{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1.设置driver"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-56-67977d54cc27>:10: DeprecationWarning: use options instead of chrome_options\n",
      "  driver = webdriver.Chrome( chrome_options = opts) #desired_capabilities=caps,\n"
     ]
    }
   ],
   "source": [
    "from selenium import webdriver\n",
    "from selenium.webdriver.common.desired_capabilities import DesiredCapabilities\n",
    "import time\n",
    "opts = webdriver.ChromeOptions()\n",
    "opts.add_argument('--no-sandbox')#解决DevToolsActivePort文件不存在的报错\n",
    "opts.add_argument('window-size=1920x3000') #指定浏览器分辨率\n",
    "opts.add_argument('--disable-gpu') #谷歌文档提到需要加上一这个属性来规避bug\n",
    "opts.add_argument('--hide-scrollbars') #隐藏滚动条, 应对些特殊页面\n",
    "\n",
    "driver = webdriver.Chrome( chrome_options = opts) #desired_capabilities=caps,"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 打开CNKI\n",
    "* 1. 校园网，自动登录  cnki.net\n",
    "* 2. 校外网，需要登录  fsso.cnki.net"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.get('https://cnki.net')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 检查是否是中山大学南方学院登录"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'中山大学南...'"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "driver.find_element_by_id('Ecp_loginShowName1').get_attribute('innerHTML')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 打开高级检索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "element = driver.find_element_by_id('highSearch')\n",
    "element.get_attribute('innerHTML')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 检查窗口\n",
    "\n",
    "* 眼见不一定为实\n",
    "* 只要是多个窗口，请检查窗口位置信息\n",
    "* 窗口信息根据窗口加载的顺序按照列表存储，每一个窗口有唯一识别的窗口ID"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'CDwindow-D4320400309D47CFC52A67B72D2B0F65'"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 当前窗口ID\n",
    "driver.current_window_handle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['CDwindow-D4320400309D47CFC52A67B72D2B0F65',\n",
       " 'CDwindow-C2572F5ED47B44CC1003C6F7F3411504']"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 所有窗口ID\n",
    "driver.window_handles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-37-704fdf3805c1>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[1])\n"
     ]
    }
   ],
   "source": [
    "# 切换窗口\n",
    "driver.switch_to_window(driver.window_handles[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 点击学术期刊\n",
    "element = driver.find_element_by_xpath('//ul[@class=\"doctype-menus keji\"]/li[@data-id=\"xsqk\"]/a')\n",
    "element.get_attribute('innerHTML')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 点击专业检索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "element = driver.find_element_by_name('majorSearch')\n",
    "element.get_attribute('innerHTML')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 选择期刊来源"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "## SCI来源期刊\n",
    "\n",
    "element = driver.find_element_by_xpath('//div[@class=\"extend-tit-labels\"]//input[@key=\"SI\"]')\n",
    "element.get_attribute('innerHTML')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "## EI来源期刊\n",
    "\n",
    "element = driver.find_element_by_xpath('//div[@class=\"extend-tit-labels\"]//input[@key=\"EI\"]')\n",
    "element.get_attribute('innerHTML')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "## 北大核心\n",
    "\n",
    "element = driver.find_element_by_xpath('//div[@class=\"extend-tit-labels\"]//input[@key=\"HX\"]')\n",
    "element.get_attribute('innerHTML')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "## CSSCI\n",
    "\n",
    "element = driver.find_element_by_xpath('//div[@class=\"extend-tit-labels\"]//input[@key=\"CSI\"]')\n",
    "element.get_attribute('innerHTML')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "## CSCD\n",
    "\n",
    "element = driver.find_element_by_xpath('//div[@class=\"extend-tit-labels\"]//input[@key=\"CSD\"]')\n",
    "element.get_attribute('innerHTML')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 设置搜索query"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = 'SU = \"平台\" AND  (TI =\"人工智能\" OR  TI =\"大数据\"  OR TI = \"AI\" OR TI = \"big data\")'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "element = driver.find_element_by_xpath('//textarea')\n",
    "element.clear()\n",
    "element.send_keys(query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "element = driver.find_element_by_xpath('//input[@value=\"检索\"]')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 检查检索信息，更换页面文章数量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'共找到<em>18,704</em>条结果'"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## 检索数量\n",
    "\n",
    "element = driver.find_element_by_xpath('//span[@class=\"pagerTitleCell\"]')\n",
    "element.get_attribute('innerHTML')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "element = driver.find_element_by_xpath('//i[@class=\"icon icon-sort\"]')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 选择页数框\n",
    "element = driver.find_element_by_xpath('//div[@id = \"perPageDiv\"]//li[@data-val=\"50\"]/a')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 抓取页面信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>篇名</th>\n",
       "      <th>作者</th>\n",
       "      <th>刊名</th>\n",
       "      <th>发表时间</th>\n",
       "      <th>被引</th>\n",
       "      <th>下载</th>\n",
       "      <th>操作</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>基于大数据的数字化电能计量误差分析  网络首发</td>\n",
       "      <td>田园; 张梅; 保富; 原野</td>\n",
       "      <td>电测与仪表</td>\n",
       "      <td>2021-06-17 10:38</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>大数据背景下基层党组织建设的困境与解决路径</td>\n",
       "      <td>周青</td>\n",
       "      <td>中小企业管理与科技(中旬刊)</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>16.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>人工智能驾驶技术商业化分析</td>\n",
       "      <td>孙吴炎</td>\n",
       "      <td>办公自动化</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>18.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>大数据智能审计助推企业高质量发展</td>\n",
       "      <td>楚文光; 胡为民; 雷年桢; 陈赛霞</td>\n",
       "      <td>中国内部审计</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>大数据背景下食用菌企业精准营销路径研究</td>\n",
       "      <td>范文婷</td>\n",
       "      <td>北方经贸</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>人工智能背景下英语教学模式改革研究</td>\n",
       "      <td>程红梅</td>\n",
       "      <td>湖北开放职业学院学报</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>大数据背景下的SIOP教学模式在高职英语课堂中的运用研究</td>\n",
       "      <td>伍齐珊</td>\n",
       "      <td>湖北开放职业学院学报</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>从人工路巡到智能发现 图元视觉AI智能平台让城市管理更智慧</td>\n",
       "      <td>周兴</td>\n",
       "      <td>中国建设信息化</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>基于大数据时代的计算机网络安全技术应用分析</td>\n",
       "      <td>唐培全; 戴小佳</td>\n",
       "      <td>网络安全技术与应用</td>\n",
       "      <td>2021-06-11</td>\n",
       "      <td>NaN</td>\n",
       "      <td>319.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>基于随机森林算法的电力计量大数据分析平台研究</td>\n",
       "      <td>文耀宽; 王献军; 王峻; 苏沛</td>\n",
       "      <td>计算机技术与发展</td>\n",
       "      <td>2021-06-10</td>\n",
       "      <td>NaN</td>\n",
       "      <td>17.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>11</td>\n",
       "      <td>打造数字湾区：粤港澳大湾区大数据中心建设的关键问题与路径建构</td>\n",
       "      <td>曾坚朋; 王建冬; 黄倩倩; 易成岐; 郭巧敏</td>\n",
       "      <td>电子政务</td>\n",
       "      <td>2021-06-09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>237.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>12</td>\n",
       "      <td>基于“大数据+AI”构建全程智能风控体系</td>\n",
       "      <td>李小庆</td>\n",
       "      <td>金融科技时代</td>\n",
       "      <td>2021-06-09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>224.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>13</td>\n",
       "      <td>大数据背景下河南省安全生产应急管理共享平台研究</td>\n",
       "      <td>郝艳艳</td>\n",
       "      <td>电脑与信息技术</td>\n",
       "      <td>2021-06-08</td>\n",
       "      <td>NaN</td>\n",
       "      <td>83.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>14</td>\n",
       "      <td>蔬菜精准管理与安全监控大数据平台总体框架设计</td>\n",
       "      <td>曾志康;韦健;李敏;郭元元;莫小香</td>\n",
       "      <td>安徽农业科学</td>\n",
       "      <td>2021-06-08</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>15</td>\n",
       "      <td>大数据背景下财务会计向管理会计转型探讨</td>\n",
       "      <td>陈婷</td>\n",
       "      <td>中国乡镇企业会计</td>\n",
       "      <td>2021-06-07</td>\n",
       "      <td>NaN</td>\n",
       "      <td>310.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>16</td>\n",
       "      <td>国网湖南电力：电力大数据科学服务于贫困户返贫预警</td>\n",
       "      <td>NaN</td>\n",
       "      <td>农村电工</td>\n",
       "      <td>2021-06-06</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>17</td>\n",
       "      <td>深化大数据应用 优化电力营商环境</td>\n",
       "      <td>庞利鹏; 赵亮; 李坤</td>\n",
       "      <td>农村电工</td>\n",
       "      <td>2021-06-06</td>\n",
       "      <td>NaN</td>\n",
       "      <td>17.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>18</td>\n",
       "      <td>基于百度AI Studio的机器学习课程教学设计与实践</td>\n",
       "      <td>徐国艳; 周彬; 刘磊; 李轩涯</td>\n",
       "      <td>计算机教育</td>\n",
       "      <td>2021-06-05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>116.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>19</td>\n",
       "      <td>新工科背景下人工智能实践类课程教学模式创新研究</td>\n",
       "      <td>鲍鹏; 邢薇薇; 卢苇; 李轩涯</td>\n",
       "      <td>计算机教育</td>\n",
       "      <td>2021-06-05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>220.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>20</td>\n",
       "      <td>产教融合背景下开展高校人工智能师资培训的实践探索</td>\n",
       "      <td>罗丽; 涂涛; 计湘婷; 李轩涯</td>\n",
       "      <td>计算机教育</td>\n",
       "      <td>2021-06-05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>104.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Unnamed: 0                              篇名                       作者  \\\n",
       "0            1         基于大数据的数字化电能计量误差分析  网络首发           田园; 张梅; 保富; 原野   \n",
       "1            2           大数据背景下基层党组织建设的困境与解决路径                       周青   \n",
       "2            3                   人工智能驾驶技术商业化分析                      孙吴炎   \n",
       "3            4                大数据智能审计助推企业高质量发展       楚文光; 胡为民; 雷年桢; 陈赛霞   \n",
       "4            5             大数据背景下食用菌企业精准营销路径研究                      范文婷   \n",
       "5            6               人工智能背景下英语教学模式改革研究                      程红梅   \n",
       "6            7    大数据背景下的SIOP教学模式在高职英语课堂中的运用研究                      伍齐珊   \n",
       "7            8   从人工路巡到智能发现 图元视觉AI智能平台让城市管理更智慧                       周兴   \n",
       "8            9           基于大数据时代的计算机网络安全技术应用分析                 唐培全; 戴小佳   \n",
       "9           10          基于随机森林算法的电力计量大数据分析平台研究         文耀宽; 王献军; 王峻; 苏沛   \n",
       "10          11  打造数字湾区：粤港澳大湾区大数据中心建设的关键问题与路径建构  曾坚朋; 王建冬; 黄倩倩; 易成岐; 郭巧敏   \n",
       "11          12            基于“大数据+AI”构建全程智能风控体系                      李小庆   \n",
       "12          13         大数据背景下河南省安全生产应急管理共享平台研究                      郝艳艳   \n",
       "13          14          蔬菜精准管理与安全监控大数据平台总体框架设计        曾志康;韦健;李敏;郭元元;莫小香   \n",
       "14          15             大数据背景下财务会计向管理会计转型探讨                       陈婷   \n",
       "15          16        国网湖南电力：电力大数据科学服务于贫困户返贫预警                      NaN   \n",
       "16          17                深化大数据应用 优化电力营商环境              庞利鹏; 赵亮; 李坤   \n",
       "17          18     基于百度AI Studio的机器学习课程教学设计与实践         徐国艳; 周彬; 刘磊; 李轩涯   \n",
       "18          19         新工科背景下人工智能实践类课程教学模式创新研究         鲍鹏; 邢薇薇; 卢苇; 李轩涯   \n",
       "19          20        产教融合背景下开展高校人工智能师资培训的实践探索         罗丽; 涂涛; 计湘婷; 李轩涯   \n",
       "\n",
       "                刊名              发表时间  被引     下载  操作  \n",
       "0            电测与仪表  2021-06-17 10:38 NaN    NaN  下载  \n",
       "1   中小企业管理与科技(中旬刊)        2021-06-15 NaN   16.0  下载  \n",
       "2            办公自动化        2021-06-15 NaN   18.0  下载  \n",
       "3           中国内部审计        2021-06-15 NaN    3.0  下载  \n",
       "4             北方经贸        2021-06-15 NaN    NaN  下载  \n",
       "5       湖北开放职业学院学报        2021-06-15 NaN    NaN  下载  \n",
       "6       湖北开放职业学院学报        2021-06-15 NaN    NaN  下载  \n",
       "7          中国建设信息化        2021-06-15 NaN    NaN  下载  \n",
       "8        网络安全技术与应用        2021-06-11 NaN  319.0  下载  \n",
       "9         计算机技术与发展        2021-06-10 NaN   17.0  下载  \n",
       "10            电子政务        2021-06-09 NaN  237.0  下载  \n",
       "11          金融科技时代        2021-06-09 NaN  224.0  下载  \n",
       "12         电脑与信息技术        2021-06-08 NaN   83.0  下载  \n",
       "13          安徽农业科学        2021-06-08 NaN    NaN  下载  \n",
       "14        中国乡镇企业会计        2021-06-07 NaN  310.0  下载  \n",
       "15            农村电工        2021-06-06 NaN    8.0  下载  \n",
       "16            农村电工        2021-06-06 NaN   17.0  下载  \n",
       "17           计算机教育        2021-06-05 NaN  116.0  下载  \n",
       "18           计算机教育        2021-06-05 NaN  220.0  下载  \n",
       "19           计算机教育        2021-06-05 NaN  104.0  下载  "
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "element = driver.find_element_by_id('gridTable')\n",
    "含有页面主要数据的表格_HTML = element.get_attribute('innerHTML')\n",
    "pd.read_html(含有页面主要数据的表格_HTML)[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 翻页"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "### 自动化账号密码登录"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'下一页'"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 翻页\n",
    "element = driver.find_element_by_id('PageNext')\n",
    "element.get_attribute('innerHTML')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'1/300'"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 跳转上限\n",
    "element = driver.find_element_by_xpath('//span[@class=\"countPageMark\"]')\n",
    "page_str = element.get_attribute('innerHTML')\n",
    "page_str"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['1', '300']"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "page_int = page_str.split('/')\n",
    "page_int"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300]\n"
     ]
    }
   ],
   "source": [
    "pages = list(range(1,int(page_int[1])+1))\n",
    "print(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "pages = list(range(1,19))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "表格_html = dict()\n",
    "main_content =\"\"\n",
    "element = None\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 函数（翻页）\n",
    "def process_pages (pages):\n",
    "    for p in pages:\n",
    "        print (p,end='\\t')\n",
    "        # 定位到“下一页”的按钮 ——> 点击\n",
    "        跳转 = driver.find_element_by_id('PageNext')\n",
    "        跳转.click()\n",
    "        # 设定休息的时间 ——> 避免爬虫被禁报错、以及出现验证码\n",
    "        time.sleep(30+20*random())\n",
    "        # 获取含有页面主要数据的表格\n",
    "        element = driver.find_element_by_id('gridTable')\n",
    "        main_content = element.get_attribute('innerHTML')\n",
    "        表格_html[p] = main_content"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\t"
     ]
    },
    {
     "ename": "WebDriverException",
     "evalue": "Message: chrome not reachable\n  (Session info: chrome=91.0.4472.106)\n",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mWebDriverException\u001b[0m                        Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-59-ffac2da82a61>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mprocess_pages\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpages\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32m<ipython-input-54-5631358d98d1>\u001b[0m in \u001b[0;36mprocess_pages\u001b[1;34m(pages)\u001b[0m\n\u001b[0;32m      4\u001b[0m         \u001b[0mprint\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mp\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mend\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'\\t'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m         \u001b[1;31m# 定位到“下一页”的按钮 ——> 点击\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 6\u001b[1;33m         \u001b[0m跳转\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdriver\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfind_element_by_id\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'PageNext'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      7\u001b[0m         \u001b[0m跳转\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclick\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      8\u001b[0m         \u001b[1;31m# 设定休息的时间 ——> 避免爬虫被禁报错、以及出现验证码\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\cc\\annaconda\\lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py\u001b[0m in \u001b[0;36mfind_element_by_id\u001b[1;34m(self, id_)\u001b[0m\n\u001b[0;32m    358\u001b[0m             \u001b[0melement\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdriver\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfind_element_by_id\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'foo'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    359\u001b[0m         \"\"\"\n\u001b[1;32m--> 360\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfind_element\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mby\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mBy\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mID\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mid_\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    361\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    362\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mfind_elements_by_id\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mid_\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\cc\\annaconda\\lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py\u001b[0m in \u001b[0;36mfind_element\u001b[1;34m(self, by, value)\u001b[0m\n\u001b[0;32m    974\u001b[0m                 \u001b[0mby\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mBy\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mCSS_SELECTOR\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    975\u001b[0m                 \u001b[0mvalue\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'[name=\"%s\"]'\u001b[0m \u001b[1;33m%\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 976\u001b[1;33m         return self.execute(Command.FIND_ELEMENT, {\n\u001b[0m\u001b[0;32m    977\u001b[0m             \u001b[1;34m'using'\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mby\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    978\u001b[0m             'value': value})['value']\n",
      "\u001b[1;32mD:\\cc\\annaconda\\lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, driver_command, params)\u001b[0m\n\u001b[0;32m    319\u001b[0m         \u001b[0mresponse\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcommand_executor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdriver_command\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    320\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mresponse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 321\u001b[1;33m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merror_handler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcheck_response\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    322\u001b[0m             response['value'] = self._unwrap_value(\n\u001b[0;32m    323\u001b[0m                 response.get('value', None))\n",
      "\u001b[1;32mD:\\cc\\annaconda\\lib\\site-packages\\selenium\\webdriver\\remote\\errorhandler.py\u001b[0m in \u001b[0;36mcheck_response\u001b[1;34m(self, response)\u001b[0m\n\u001b[0;32m    240\u001b[0m                 \u001b[0malert_text\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'alert'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'text'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    241\u001b[0m             \u001b[1;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0malert_text\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 242\u001b[1;33m         \u001b[1;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    243\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    244\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m_value_or_default\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdefault\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mWebDriverException\u001b[0m: Message: chrome not reachable\n  (Session info: chrome=91.0.4472.106)\n"
     ]
    }
   ],
   "source": [
    "process_pages(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame([表格_html]).T\n",
    "df.columns = [\"html_snippets\"]\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "网站 = \"中国知网\"\n",
    "# 指定内容输出的位置\n",
    "fn = { \"output\" : { \"htm_snippets\": \"data_raw_src/知网_htm_snippets_{网站}.tsv\"}\n",
    "     }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "ename": "NoSuchElementException",
     "evalue": "Message: no such element: Unable to locate element: {\"method\":\"css selector\",\"selector\":\"[id=\"changeVercode\"]\"}\n  (Session info: chrome=90.0.4430.212)\n",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNoSuchElementException\u001b[0m                    Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-29-41d066440d38>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mimage_url\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdriver\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfind_element_by_id\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'changeVercode'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_attribute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'src'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      5\u001b[0m \u001b[0mimage_url\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mc:\\users\\10144\\appdata\\local\\programs\\python\\python38\\lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py\u001b[0m in \u001b[0;36mfind_element_by_id\u001b[1;34m(self, id_)\u001b[0m\n\u001b[0;32m    358\u001b[0m             \u001b[0melement\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdriver\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfind_element_by_id\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'foo'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    359\u001b[0m         \"\"\"\n\u001b[1;32m--> 360\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfind_element\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mby\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mBy\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mID\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mid_\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    361\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    362\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mfind_elements_by_id\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mid_\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mc:\\users\\10144\\appdata\\local\\programs\\python\\python38\\lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py\u001b[0m in \u001b[0;36mfind_element\u001b[1;34m(self, by, value)\u001b[0m\n\u001b[0;32m    974\u001b[0m                 \u001b[0mby\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mBy\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mCSS_SELECTOR\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    975\u001b[0m                 \u001b[0mvalue\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'[name=\"%s\"]'\u001b[0m \u001b[1;33m%\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 976\u001b[1;33m         return self.execute(Command.FIND_ELEMENT, {\n\u001b[0m\u001b[0;32m    977\u001b[0m             \u001b[1;34m'using'\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mby\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    978\u001b[0m             'value': value})['value']\n",
      "\u001b[1;32mc:\\users\\10144\\appdata\\local\\programs\\python\\python38\\lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, driver_command, params)\u001b[0m\n\u001b[0;32m    319\u001b[0m         \u001b[0mresponse\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcommand_executor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdriver_command\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    320\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mresponse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 321\u001b[1;33m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merror_handler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcheck_response\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    322\u001b[0m             response['value'] = self._unwrap_value(\n\u001b[0;32m    323\u001b[0m                 response.get('value', None))\n",
      "\u001b[1;32mc:\\users\\10144\\appdata\\local\\programs\\python\\python38\\lib\\site-packages\\selenium\\webdriver\\remote\\errorhandler.py\u001b[0m in \u001b[0;36mcheck_response\u001b[1;34m(self, response)\u001b[0m\n\u001b[0;32m    240\u001b[0m                 \u001b[0malert_text\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'alert'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'text'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    241\u001b[0m             \u001b[1;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0malert_text\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 242\u001b[1;33m         \u001b[1;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    243\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    244\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m_value_or_default\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdefault\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mNoSuchElementException\u001b[0m: Message: no such element: Unable to locate element: {\"method\":\"css selector\",\"selector\":\"[id=\"changeVercode\"]\"}\n  (Session info: chrome=90.0.4430.212)\n"
     ]
    }
   ],
   "source": [
    "\n",
    "# 拿下来这张图片\n",
    "\n",
    "    \n",
    "image_url = driver.find_element_by_id('changeVercode').get_attribute('src')\n",
    "image_url"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "ename": "NoSuchElementException",
     "evalue": "Message: no such element: Unable to locate element: {\"method\":\"css selector\",\"selector\":\"[id=\"PageNext\"]\"}\n  (Session info: chrome=90.0.4430.212)\n",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNoSuchElementException\u001b[0m                    Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-49-7dd7fa4f501d>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      5\u001b[0m \u001b[1;31m#         pass\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      6\u001b[0m \u001b[1;31m#     else:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m         \u001b[0mdriver\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfind_element_by_id\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'PageNext'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclick\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      8\u001b[0m         \u001b[0mtime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m4\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mc:\\users\\10144\\appdata\\local\\programs\\python\\python38\\lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py\u001b[0m in \u001b[0;36mfind_element_by_id\u001b[1;34m(self, id_)\u001b[0m\n\u001b[0;32m    358\u001b[0m             \u001b[0melement\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdriver\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfind_element_by_id\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'foo'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    359\u001b[0m         \"\"\"\n\u001b[1;32m--> 360\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfind_element\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mby\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mBy\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mID\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mid_\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    361\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    362\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mfind_elements_by_id\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mid_\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mc:\\users\\10144\\appdata\\local\\programs\\python\\python38\\lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py\u001b[0m in \u001b[0;36mfind_element\u001b[1;34m(self, by, value)\u001b[0m\n\u001b[0;32m    974\u001b[0m                 \u001b[0mby\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mBy\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mCSS_SELECTOR\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    975\u001b[0m                 \u001b[0mvalue\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'[name=\"%s\"]'\u001b[0m \u001b[1;33m%\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 976\u001b[1;33m         return self.execute(Command.FIND_ELEMENT, {\n\u001b[0m\u001b[0;32m    977\u001b[0m             \u001b[1;34m'using'\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mby\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    978\u001b[0m             'value': value})['value']\n",
      "\u001b[1;32mc:\\users\\10144\\appdata\\local\\programs\\python\\python38\\lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, driver_command, params)\u001b[0m\n\u001b[0;32m    319\u001b[0m         \u001b[0mresponse\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcommand_executor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdriver_command\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    320\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mresponse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 321\u001b[1;33m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merror_handler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcheck_response\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    322\u001b[0m             response['value'] = self._unwrap_value(\n\u001b[0;32m    323\u001b[0m                 response.get('value', None))\n",
      "\u001b[1;32mc:\\users\\10144\\appdata\\local\\programs\\python\\python38\\lib\\site-packages\\selenium\\webdriver\\remote\\errorhandler.py\u001b[0m in \u001b[0;36mcheck_response\u001b[1;34m(self, response)\u001b[0m\n\u001b[0;32m    240\u001b[0m                 \u001b[0malert_text\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'alert'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'text'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    241\u001b[0m             \u001b[1;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0malert_text\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 242\u001b[1;33m         \u001b[1;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    243\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    244\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m_value_or_default\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdefault\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mNoSuchElementException\u001b[0m: Message: no such element: Unable to locate element: {\"method\":\"css selector\",\"selector\":\"[id=\"PageNext\"]\"}\n  (Session info: chrome=90.0.4430.212)\n"
     ]
    }
   ],
   "source": [
    "import time\n",
    "# 循环翻页,验证码\n",
    "for i in range(0,100):\n",
    "#     if driver.find_element_by_id('changeVercode'):\n",
    "#         pass\n",
    "#     else:    \n",
    "        driver.find_element_by_id('PageNext').click()\n",
    "        time.sleep(4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>篇名</th>\n",
       "      <th>作者</th>\n",
       "      <th>刊名</th>\n",
       "      <th>发表时间</th>\n",
       "      <th>被引</th>\n",
       "      <th>下载</th>\n",
       "      <th>操作</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>481</td>\n",
       "      <td>中央电视台网络新媒体的探索与实践</td>\n",
       "      <td>汪文斌</td>\n",
       "      <td>电视研究</td>\n",
       "      <td>2007-03-05</td>\n",
       "      <td>4.0</td>\n",
       "      <td>127</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>482</td>\n",
       "      <td>谈当前新媒体发展时期广播和网络的融合</td>\n",
       "      <td>徐立谦</td>\n",
       "      <td>新闻知识</td>\n",
       "      <td>2007-02-28</td>\n",
       "      <td>1.0</td>\n",
       "      <td>172</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>483</td>\n",
       "      <td>北京城市学院“网络传播与新媒体”专业2007年毕业44人</td>\n",
       "      <td>NaN</td>\n",
       "      <td>现代教育技术</td>\n",
       "      <td>2007-02-01</td>\n",
       "      <td>NaN</td>\n",
       "      <td>54</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>484</td>\n",
       "      <td>北京城市学院“网络传播与新媒体”专业</td>\n",
       "      <td>NaN</td>\n",
       "      <td>现代教育技术</td>\n",
       "      <td>2007-01-01</td>\n",
       "      <td>NaN</td>\n",
       "      <td>123</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>485</td>\n",
       "      <td>传统图书与网络的新媒体出版趋势</td>\n",
       "      <td>曹欣渊</td>\n",
       "      <td>编辑学刊</td>\n",
       "      <td>2006-12-15</td>\n",
       "      <td>14.0</td>\n",
       "      <td>384</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>486</td>\n",
       "      <td>新媒体视野中的网络文学</td>\n",
       "      <td>金振邦</td>\n",
       "      <td>东北师大学报</td>\n",
       "      <td>2005-09-20</td>\n",
       "      <td>25.0</td>\n",
       "      <td>1645</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>487</td>\n",
       "      <td>世界平面广告设计精选——新媒体及网络广告类</td>\n",
       "      <td>杜羿纬</td>\n",
       "      <td>装饰</td>\n",
       "      <td>2005-08-01</td>\n",
       "      <td>4.0</td>\n",
       "      <td>376</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>488</td>\n",
       "      <td>“网络电视风暴”背后——兼论数字新媒体开发制度安排的重要性与紧迫性</td>\n",
       "      <td>高子华</td>\n",
       "      <td>中国广播电视学刊</td>\n",
       "      <td>2005-07-25</td>\n",
       "      <td>7.0</td>\n",
       "      <td>229</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>489</td>\n",
       "      <td>新媒体 新挑战 新发展——首届海峡两岸网络与影视经营管理研讨会综述</td>\n",
       "      <td>许正林,范荣霞,张政方</td>\n",
       "      <td>上海大学学报(社会科学版)</td>\n",
       "      <td>2003-01-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>180</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>490</td>\n",
       "      <td>建设网络新媒体促进广播电视与网络联姻</td>\n",
       "      <td>王建</td>\n",
       "      <td>中国有线电视</td>\n",
       "      <td>2001-09-25</td>\n",
       "      <td>1.0</td>\n",
       "      <td>75</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>491</td>\n",
       "      <td>发展新媒体的若干思考——由世界网络所想到的</td>\n",
       "      <td>林涛</td>\n",
       "      <td>中国广播电视学刊</td>\n",
       "      <td>2000-11-25</td>\n",
       "      <td>1.0</td>\n",
       "      <td>76</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>492</td>\n",
       "      <td>网络时代的对话与交流——新媒体技术2000年报告会内容纪要</td>\n",
       "      <td>钟新</td>\n",
       "      <td>国际新闻界</td>\n",
       "      <td>2000-09-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>235</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>493</td>\n",
       "      <td>专家说网络与电视组合成的“新媒体”将大行其道</td>\n",
       "      <td>NaN</td>\n",
       "      <td>新闻与成才</td>\n",
       "      <td>2000-08-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>42</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Unnamed: 0                                 篇名           作者             刊名  \\\n",
       "0          481                   中央电视台网络新媒体的探索与实践          汪文斌           电视研究   \n",
       "1          482                 谈当前新媒体发展时期广播和网络的融合          徐立谦           新闻知识   \n",
       "2          483       北京城市学院“网络传播与新媒体”专业2007年毕业44人          NaN         现代教育技术   \n",
       "3          484                 北京城市学院“网络传播与新媒体”专业          NaN         现代教育技术   \n",
       "4          485                    传统图书与网络的新媒体出版趋势          曹欣渊           编辑学刊   \n",
       "5          486                        新媒体视野中的网络文学          金振邦         东北师大学报   \n",
       "6          487              世界平面广告设计精选——新媒体及网络广告类          杜羿纬             装饰   \n",
       "7          488  “网络电视风暴”背后——兼论数字新媒体开发制度安排的重要性与紧迫性          高子华       中国广播电视学刊   \n",
       "8          489  新媒体 新挑战 新发展——首届海峡两岸网络与影视经营管理研讨会综述  许正林,范荣霞,张政方  上海大学学报(社会科学版)   \n",
       "9          490                 建设网络新媒体促进广播电视与网络联姻           王建         中国有线电视   \n",
       "10         491              发展新媒体的若干思考——由世界网络所想到的           林涛       中国广播电视学刊   \n",
       "11         492      网络时代的对话与交流——新媒体技术2000年报告会内容纪要           钟新          国际新闻界   \n",
       "12         493             专家说网络与电视组合成的“新媒体”将大行其道          NaN          新闻与成才   \n",
       "\n",
       "          发表时间    被引    下载  操作  \n",
       "0   2007-03-05   4.0   127  下载  \n",
       "1   2007-02-28   1.0   172  下载  \n",
       "2   2007-02-01   NaN    54  下载  \n",
       "3   2007-01-01   NaN   123  下载  \n",
       "4   2006-12-15  14.0   384  下载  \n",
       "5   2005-09-20  25.0  1645  下载  \n",
       "6   2005-08-01   4.0   376  下载  \n",
       "7   2005-07-25   7.0   229  下载  \n",
       "8   2003-01-15   NaN   180  下载  \n",
       "9   2001-09-25   1.0    75  下载  \n",
       "10  2000-11-25   1.0    76  下载  \n",
       "11  2000-09-25   NaN   235  下载  \n",
       "12  2000-08-15   NaN    42  下载  "
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "element = driver.find_element_by_id('gridTable')\n",
    "含有页面主要数据的表格_HTML = element.get_attribute('innerHTML')\n",
    "pd.read_html(含有页面主要数据的表格_HTML)[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_table_list():\n",
    "        # 返回该元素定位的所有元素集合\n",
    "        totalcount=self.get_page_size(totalitem,pagesize)\n",
    "        logging.info(\"当前总页数是{0}\".format(totalcount))\n",
    "        list=[]\n",
    "        # 获取第一页的数据\n",
    "        self.wait_utilVisible(locator)\n",
    "        ele=self.driver.find_elements_by_xpath(locator)\n",
    "        for i in ele:\n",
    "            # 返回页面元素的文本内容\n",
    "            text=i.text\n",
    "            logging.info(\"该元素的内容是{0}\".format(i.text))\n",
    "            # 将文本内容添加到列表中保存\n",
    "            list.append(text)\n",
    "        time.sleep(2)\n",
    "        # 如果页数不等于1，则进行翻页，然后获取每一页的数据信息\n",
    "        if totalcount!=1:\n",
    "            for i in range(1,totalcount):\n",
    "                 self.get_element(nextpage).click()\n",
    "                 self.wait_utilVisible(locator)\n",
    "                 ele=self.driver.find_elements_by_xpath(locator)\n",
    "                 # 对返回的页面元素遍历\n",
    "                 for i in ele:\n",
    "            # 返回页面元素的文本内容\n",
    "                    text=i.text\n",
    "                    logging.info(\"该元素的内容是{0}\".format(i.text))\n",
    "            # 将文本内容添加到列表中保存\n",
    "                    list.append(text)\n",
    "        return list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'get_page_size' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-67-7313980bcbf0>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mget_table_list\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32m<ipython-input-66-02cde4171e90>\u001b[0m in \u001b[0;36mget_table_list\u001b[1;34m()\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mget_table_list\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      2\u001b[0m         \u001b[1;31m# 返回该元素定位的所有元素集合\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m         \u001b[0mtotalcount\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mget_page_size\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtotalitem\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mpagesize\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      4\u001b[0m         \u001b[0mlogging\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"当前总页数是{0}\"\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtotalcount\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m         \u001b[0mlist\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mNameError\u001b[0m: name 'get_page_size' is not defined"
     ]
    }
   ],
   "source": [
    "get_table_list()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "element.click"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "element = driver.find_element_by_id('PageNext')\n",
    "for i in range(0,26):\n",
    "    element.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 填写关键词"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "element = driver.find_element_by_xpath('//*[@id=\"gradetxt\"]/dd[1]/div[2]/input')\n",
    "element.clear()\n",
    "element.send_keys('人工智能新闻')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "element = driver.find_element_by_xpath('//div[@class=\"search-butt\"]')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
